# Best configs found by SMAC with/without NAS.
# full means the best configs after running for 50 iterations, comparable means the best configs after 8 (which is
# comparable in resources to what we've used)
smac_params = {
    # with annealing -- reflecting rebuttal
    'ur5e_nas_same_resource_anneal': {"NAS_policy_log2_width": 7, "NAS_policy_num_layers": 4,
                                      "NAS_policy_use_spectral_norm": 1, "NAS_q_log2_width": 7, "NAS_q_num_layers": 4,
                                      "NAS_q_use_spectral_norm": 0, "discounting": 0.9160581261991341,
                                      "lambda_": 0.9825438883092134, "log10_entropy_cost": -4.353610932337263,
                                      "log10_lr": -3.1601467365477376, "log2_batch_size": 6, "num_update_epochs": 2,
                                      "ppo_epsilon": 0.16827446519764194, "reward_scaling": 0.3279649923814395,
                                      "unroll_length": 13},
    'ur5e_no_nas_same_resource_anneal': {"discounting": 0.9396929935903785, "lambda_": 0.9015766642514311, "log10_entropy_cost": -5.050395380494534, "log10_lr": -3.068826861759063, "log2_batch_size": 8, "num_update_epochs": 4, "ppo_epsilon": 0.1284912073448547, "reward_scaling": 0.5512377281275812, "unroll_length": 11},
    'halfcheetah_nas_same_resource_anneal':  {"NAS_policy_log2_width": 6, "NAS_policy_num_layers": 4, "NAS_policy_use_spectral_norm": 1, "NAS_q_log2_width": 5, "NAS_q_num_layers": 3, "NAS_q_use_spectral_norm": 1, "discounting": 0.989824011230548, "lambda_": 0.9744946609957571, "log10_entropy_cost": -5.193996434761918, "log10_lr": -3.1057651372844823, "log2_batch_size": 7, "num_update_epochs": 2, "ppo_epsilon": 0.22535319602377557, "reward_scaling": 0.07543739911945412, "unroll_length": 10},
    'halfcheetah_no_nas_same_resource_anneal':  {"discounting": 0.9338669486739279, "lambda_": 0.9705448197019788, "log10_entropy_cost": -2.230466385353436, "log10_lr": -3.0634118552754, "log2_batch_size": 8, "num_update_epochs": 5, "ppo_epsilon": 0.2037964425215956, "reward_scaling": 0.07379144394398576, "unroll_length": 11},
    'ant_nas_same_resource_anneal':{"NAS_policy_log2_width": 6, "NAS_policy_num_layers": 4, "NAS_policy_use_spectral_norm": 1, "NAS_q_log2_width": 5, "NAS_q_num_layers": 3, "NAS_q_use_spectral_norm": 1, "discounting": 0.989824011230548, "lambda_": 0.9744946609957571, "log10_entropy_cost": -5.193996434761918, "log10_lr": -3.1057651372844823, "log2_batch_size": 7, "num_update_epochs": 2, "ppo_epsilon": 0.22535319602377557, "reward_scaling": 0.07543739911945412, "unroll_length": 10},
    'ant_no_nas_same_resource_anneal': {"discounting": 0.9338669486739279, "lambda_": 0.9705448197019788, "log10_entropy_cost": -2.230466385353436, "log10_lr": -3.0634118552754, "log2_batch_size": 8, "num_update_epochs": 5, "ppo_epsilon": 0.2037964425215956, "reward_scaling": 0.07379144394398576, "unroll_length": 11},
    # without annealing -- initial exps
    'halfcheetah_no_nas_full': {'discounting': 0.9832170120017022,
                                           'lambda_': 0.9215836319188409,
                                           'log10_entropy_cost': -1.5303751830730867,
                                           'log10_lr': -3.0000287534035532,
                                           'log2_batch_size': 6,
                                           'num_update_epochs': 2,
                                           'ppo_epsilon': 0.15652792826255765,
                                           'reward_scaling': 7.9197857108140415,
                                           'unroll_length': 10},
               'halfcheetah_no_nas_same_resource': {'discounting': 0.9338669486739279,
                                                    'lambda_': 0.9705448197019788,
                                                    'log10_entropy_cost': -2.230466385353436,
                                                    'log10_lr': -3.0634118552754,
                                                    'log2_batch_size': 8,
                                                    'num_update_epochs': 5,
                                                    'ppo_epsilon': 0.2037964425215956,
                                                    'reward_scaling': 0.07379144394398576,
                                                    'unroll_length': 11},
               'ant_no_nas_full': {'discounting': 0.9658790035444239,
                                   'lambda_': 0.9382182288035703,
                                   'log10_entropy_cost': -1.0446816030098764,
                                   'log10_lr': -3.822271141055585,
                                   'log2_batch_size': 6,
                                   'num_update_epochs': 6,
                                   'ppo_epsilon': 0.274824351207112,
                                   'reward_scaling': 2.167289972525522,
                                   'unroll_length': 8},
               'ant_no_nas_same_resource': {'discounting': 0.9304199001622862,
                                            'lambda_': 0.9844858845239164,
                                            'log10_entropy_cost': -5.699283909996239,
                                            'log10_lr': -3.6495879615419913,
                                            'log2_batch_size': 7,
                                            'num_update_epochs': 2,
                                            'ppo_epsilon': 0.21485178081793832,
                                            'reward_scaling': 0.10433471360854171,
                                            'unroll_length': 7},
               'humanoid_no_nas_full': {'discounting': 0.9877215443463575,
                                        'lambda_': 0.996622078082515,
                                        'log10_entropy_cost': -5.966499469965771,
                                        'log10_lr': -3.299462648745925,
                                        'log2_batch_size': 10,
                                        'num_update_epochs': 14,
                                        'ppo_epsilon': 0.1326764990518262,
                                        'reward_scaling': 6.731329554027374,
                                        'unroll_length': 15},
               'humanoid_no_nas_same_resource': {'discounting': 0.97,
                                                 'lambda_': 0.95,
                                                 'log10_entropy_cost': -2.0,
                                                 'log10_lr': -3.5228787453,
                                                 'log2_batch_size': 10,
                                                 'num_update_epochs': 4,
                                                 'ppo_epsilon': 0.2,
                                                 'reward_scaling': 10.0,
                                                 'unroll_length': 5},
               'ur5e_no_nas_full': {"discounting": 0.9535737128397871,
                                    "lambda_": 0.9983420657966122,
                                    "log10_entropy_cost": -5.9397026093296095, "log10_lr": -3.018103922917397,
                                    "log2_batch_size": 10, "num_update_epochs": 12, "ppo_epsilon": 0.29430488893282303,
                                    "reward_scaling": 0.06525686604701961, "unroll_length": 12},
               'ur5e_no_nas_same_resource': {"discounting": 0.9400989183855041, "lambda_": 0.9248659851527554, "log10_entropy_cost": -5.0866458875291, "log10_lr": -3.114613514119878, "log2_batch_size": 10, "num_update_epochs": 4, "ppo_epsilon": 0.2522346887831256, "reward_scaling": 5.139263761155616, "unroll_length": 10},

               'fetch_no_nas_full': {"discounting": 0.9926048585191203, "lambda_": 0.9143966683809505, "log10_entropy_cost": -5.9148042048664236, "log10_lr": -3.971810214765201, "log2_batch_size": 10, "num_update_epochs": 14, "ppo_epsilon": 0.10348827407208394, "reward_scaling": 9.787761918851622, "unroll_length": 15},
               'fetch_no_nas_same_resource': {'discounting': 0.9928592633595533,
                                              'lambda_': 0.9004507055130907,
                                              'log10_entropy_cost': -4.420912423408083,
                                              'log10_lr': -3.8485819319182317,
                                              'log2_batch_size': 9,
                                              'num_update_epochs': 6,
                                              'ppo_epsilon': 0.3020465999679072,
                                              'reward_scaling': 15.917835592783108,
                                              'unroll_length': 7},
               'reacher_no_nas_full': {'discounting': 0.968603669432952,
                                       'lambda_': 0.9510522944973988,
                                       'log10_entropy_cost': -3.6920878723934223,
                                       'log10_lr': -3.998689291551331,
                                       'log2_batch_size': 7,
                                       'num_update_epochs': 10,
                                       'ppo_epsilon': 0.2809706632457645,
                                       'reward_scaling': 0.11020249023815507,
                                       'unroll_length': 13},
               'reacher_no_nas_same_resource': {'discounting': 0.9716498924957241,
                                                'lambda_': 0.9457367781792819,
                                                'log10_entropy_cost': -3.0582525700297083,
                                                'log10_lr': -3.6268911278478386,
                                                'log2_batch_size': 8,
                                                'num_update_epochs': 2,
                                                'ppo_epsilon': 0.22564939510939577,
                                                'reward_scaling': 0.5982607609257286,
                                                'unroll_length': 10},
               'hopper_no_nas_full': {'discounting': 0.9536380993423758,
                                      'lambda_': 0.9532038459366249,
                                      'log10_entropy_cost': -1.4383331518852494,
                                      'log10_lr': -3.886176129633295,
                                      'log2_batch_size': 6,
                                      'num_update_epochs': 4,
                                      'ppo_epsilon': 0.20092290924315567,
                                      'reward_scaling': 2.6540241526050443,
                                      'unroll_length': 12},
               'hopper_no_nas_same_resource': {'discounting': 0.9657788541715799,
                                               'lambda_': 0.970925050467342,
                                               'log10_entropy_cost': -2.9669937584764243,
                                               'log10_lr': -3.896940404477963,
                                               'log2_batch_size': 8,
                                               'num_update_epochs': 5,
                                               'ppo_epsilon': 0.25726847137547115,
                                               'reward_scaling': 0.07114580340025169,
                                               'unroll_length': 10},
               'halfcheetah_nas_full': {'NAS_policy_log2_width': 6,
                                        'NAS_policy_num_layers': 4,
                                        'NAS_policy_use_spectral_norm': 1,
                                        'NAS_q_log2_width': 5,
                                        'NAS_q_num_layers': 3,
                                        'NAS_q_use_spectral_norm': 1,
                                        'discounting': 0.989824011230548,
                                        'lambda_': 0.9744946609957571,
                                        'log10_entropy_cost': -5.193996434761918,
                                        'log10_lr': -3.1057651372844823,
                                        'log2_batch_size': 7,
                                        'num_update_epochs': 2,
                                        'ppo_epsilon': 0.22535319602377557,
                                        'reward_scaling': 0.07543739911945412,
                                        'unroll_length': 10},
               'halfcheetah_nas_same_resource': {'NAS_policy_log2_width': 6,
                                                 'NAS_policy_num_layers': 4,
                                                 'NAS_policy_use_spectral_norm': 1,
                                                 'NAS_q_log2_width': 5,
                                                 'NAS_q_num_layers': 3,
                                                 'NAS_q_use_spectral_norm': 1,
                                                 'discounting': 0.989824011230548,
                                                 'lambda_': 0.9744946609957571,
                                                 'log10_entropy_cost': -5.193996434761918,
                                                 'log10_lr': -3.1057651372844823,
                                                 'log2_batch_size': 7,
                                                 'num_update_epochs': 2,
                                                 'ppo_epsilon': 0.22535319602377557,
                                                 'reward_scaling': 0.07543739911945412,
                                                 'unroll_length': 10},
               'ant_nas_full': {'NAS_policy_log2_width': 7,
                                'NAS_policy_num_layers': 1,
                                'NAS_policy_use_spectral_norm': 0,
                                'NAS_q_log2_width': 5,
                                'NAS_q_num_layers': 3,
                                'NAS_q_use_spectral_norm': 1,
                                'discounting': 0.9708981343089256,
                                'lambda_': 0.9967249818478758,
                                'log10_entropy_cost': -4.766317483883446,
                                'log10_lr': -3.8340082661220634,
                                'log2_batch_size': 7,
                                'num_update_epochs': 2,
                                'ppo_epsilon': 0.14156246217048526,
                                'reward_scaling': 0.09911938007116479,
                                'unroll_length': 7},
               'ant_nas_same_resource': {'NAS_policy_log2_width': 5,
                                         'NAS_policy_num_layers': 2,
                                         'NAS_policy_use_spectral_norm': 1,
                                         'NAS_q_log2_width': 5,
                                         'NAS_q_num_layers': 2,
                                         'NAS_q_use_spectral_norm': 1,
                                         'discounting': 0.97,
                                         'lambda_': 0.95,
                                         'log10_entropy_cost': -2.0,
                                         'log10_lr': -3.5228787453,
                                         'log2_batch_size': 10,
                                         'num_update_epochs': 4,
                                         'ppo_epsilon': 0.2,
                                         'reward_scaling': 10.0,
                                         'unroll_length': 5},
               'humanoid_nas_full': {'NAS_policy_log2_width': 7,
                                     'NAS_policy_num_layers': 4,
                                     'NAS_policy_use_spectral_norm': 0,
                                     'NAS_q_log2_width': 6,
                                     'NAS_q_num_layers': 5,
                                     'NAS_q_use_spectral_norm': 0,
                                     'discounting': 0.9970702803161458,
                                     'lambda_': 0.9091281899805631,
                                     'log10_entropy_cost': -3.912472236863973,
                                     'log10_lr': -3.3536491403865307,
                                     'log2_batch_size': 7,
                                     'num_update_epochs': 5,
                                     'ppo_epsilon': 0.13274525873651655,
                                     'reward_scaling': 0.2352925925795548,
                                     'unroll_length': 11},
               'humanoid_nas_same_resource': {'NAS_policy_log2_width': 6,
                                              'NAS_policy_num_layers': 4,
                                              'NAS_policy_use_spectral_norm': 1,
                                              'NAS_q_log2_width': 5,
                                              'NAS_q_num_layers': 3,
                                              'NAS_q_use_spectral_norm': 1,
                                              'discounting': 0.989824011230548,
                                              'lambda_': 0.9744946609957571,
                                              'log10_entropy_cost': -5.193996434761918,
                                              'log10_lr': -3.1057651372844823,
                                              'log2_batch_size': 7,
                                              'num_update_epochs': 2,
                                              'ppo_epsilon': 0.22535319602377557,
                                              'reward_scaling': 0.07543739911945412,
                                              'unroll_length': 10},
               'ur5e_nas_full': {'NAS_policy_log2_width': 8,
                                 'NAS_policy_num_layers': 4,
                                 'NAS_policy_use_spectral_norm': 1,
                                 'NAS_q_log2_width': 8,
                                 'NAS_q_num_layers': 3,
                                 'NAS_q_use_spectral_norm': 1,
                                 'discounting': 0.9515256479982855,
                                 'lambda_': 0.9827230872422089,
                                 'log10_entropy_cost': -5.65788150422563,
                                 'log10_lr': -3.1412274684982715,
                                 'log2_batch_size': 6,
                                 'num_update_epochs': 13,
                                 'ppo_epsilon': 0.13874397856880508,
                                 'reward_scaling': 1.4356189970625488,
                                 'unroll_length': 14},
               'ur5e_nas_same_resource': {'NAS_policy_log2_width': 8,
                                          'NAS_policy_num_layers': 3,
                                          'NAS_policy_use_spectral_norm': 0,
                                          'NAS_q_log2_width': 8,
                                          'NAS_q_num_layers': 2,
                                          'NAS_q_use_spectral_norm': 0,
                                          'discounting': 0.9743673487485481,
                                          'lambda_': 0.9231943690307993,
                                          'log10_entropy_cost': -4.933291157183666,
                                          'log10_lr': -3.1752187476300047,
                                          'log2_batch_size': 9,
                                          'num_update_epochs': 12,
                                          'ppo_epsilon': 0.39898843668019335,
                                          'reward_scaling': 4.10651341700757,
                                          'unroll_length': 11},
               'fetch_nas_full': {'NAS_policy_log2_width': 7,
                                  'NAS_policy_num_layers': 3,
                                  'NAS_policy_use_spectral_norm': 1,
                                  'NAS_q_log2_width': 8,
                                  'NAS_q_num_layers': 2,
                                  'NAS_q_use_spectral_norm': 0,
                                  'discounting': 0.9916180008580232,
                                  'lambda_': 0.9756697071655699,
                                  'log10_entropy_cost': -5.054727056555046,
                                  'log10_lr': -3.2002857541684095,
                                  'log2_batch_size': 9,
                                  'num_update_epochs': 2,
                                  'ppo_epsilon': 0.32360567019986536,
                                  'reward_scaling': 3.66123319925151,
                                  'unroll_length': 8},
               'fetch_nas_same_resource': {'NAS_policy_log2_width': 6,
                                           'NAS_policy_num_layers': 4,
                                           'NAS_policy_use_spectral_norm': 1,
                                           'NAS_q_log2_width': 5,
                                           'NAS_q_num_layers': 3,
                                           'NAS_q_use_spectral_norm': 1,
                                           'discounting': 0.989824011230548,
                                           'lambda_': 0.9744946609957571,
                                           'log10_entropy_cost': -5.193996434761918,
                                           'log10_lr': -3.1057651372844823,
                                           'log2_batch_size': 7,
                                           'num_update_epochs': 2,
                                           'ppo_epsilon': 0.22535319602377557,
                                           'reward_scaling': 0.07543739911945412,
                                           'unroll_length': 10},
               'hopper_nas_full': {'NAS_policy_log2_width': 7,
                                   'NAS_policy_num_layers': 4,
                                   'NAS_policy_use_spectral_norm': 0,
                                   'NAS_q_log2_width': 6,
                                   'NAS_q_num_layers': 5,
                                   'NAS_q_use_spectral_norm': 0,
                                   'discounting': 0.9970702803161458,
                                   'lambda_': 0.9091281899805631,
                                   'log10_entropy_cost': -3.912472236863973,
                                   'log10_lr': -3.3536491403865307,
                                   'log2_batch_size': 7,
                                   'num_update_epochs': 5,
                                   'ppo_epsilon': 0.13274525873651655,
                                   'reward_scaling': 0.2352925925795548,
                                   'unroll_length': 11},
               'hopper_nas_same_resource': {'NAS_policy_log2_width': 6,
                                            'NAS_policy_num_layers': 4,
                                            'NAS_policy_use_spectral_norm': 1,
                                            'NAS_q_log2_width': 5,
                                            'NAS_q_num_layers': 3,
                                            'NAS_q_use_spectral_norm': 1,
                                            'discounting': 0.989824011230548,
                                            'lambda_': 0.9744946609957571,
                                            'log10_entropy_cost': -5.193996434761918,
                                            'log10_lr': -3.1057651372844823,
                                            'log2_batch_size': 7,
                                            'num_update_epochs': 2,
                                            'ppo_epsilon': 0.22535319602377557,
                                            'reward_scaling': 0.07543739911945412,
                                            'unroll_length': 10},
               'reacher_nas_full': {"discounting": 0.9400989183855041, "lambda_": 0.9248659851527554, "log10_entropy_cost": -5.0866458875291, "log10_lr": -3.114613514119878, "log2_batch_size": 10, "num_update_epochs": 4, "ppo_epsilon": 0.2522346887831256, "reward_scaling": 5.139263761155616, "unroll_length": 10},
               'reacher_nas_same_resource': {"NAS_policy_log2_width": 7, "NAS_policy_num_layers": 1, "NAS_policy_use_spectral_norm": 1, "NAS_q_log2_width": 5, "NAS_q_num_layers": 4, "NAS_q_use_spectral_norm": 1, "discounting": 0.941713896068972, "lambda_": 0.909495224357209, "log10_entropy_cost": -2.903156814323129, "log10_lr": -3.0555261379638092, "log2_batch_size": 7, "num_update_epochs": 4, "ppo_epsilon": 0.21757979059010035, "reward_scaling": 3.283226146934889, "unroll_length": 5},
               }
